 
 cd "D:\Dropbox\book_welfare\replication"
 
 use data\book_author_mapping.dta,clear 
		keep book_id pubyr mshare genre1 
		duplicates drop 
		tempfile b 
		save `b'
 
 import delimited data\user_id_map.csv, clear 
	tempfile u 
	save `u'
	
use  part10m0.dta, clear 

 forvalues k=1(1) 22{
  append using data\part10m`k'.dta 

 }

 cd "D:\Dropbox\book_welfare\replication"

		merge m:1 user_id using `u'
		drop user_id review_id
		rename user_id_csv user_id 

		drop _merge 


		merge m:1 user_id using   data\user_genre.dta


		su authorgender, de
		local m=r(p50)

		gen dfem=authorgender~=. & authorgender<`m'
		replace dfem = 0 if authorgender~=. & authorgender>=`m'

		gen dmale = 1-dfem  




		gen x=1

		xtile m10 = authorgender, nq(10)
		forvalues k=1(1)10 {
			gen qc`k'=x*(m10==`k')
			} 

	
	
		gen year=year(ddate)
		collapse (count) q=x (sum) qf=dfem qm = dmale qc* , by(year book_id)  	
		
		

		
	
	merge m:1 book_id using  data\books.dta
	
	keep if _merge==3 
	drop _merge 
	


	gen fbook= mshare<.5  if mshare~=. 
	gen fbookp= pmshare<.5  if pmshare~=. 
		
	
	replace genre1="missing" if genre1==""
	egen gno=group(genre1)

	
	* save clean\books_year_large.dta, replace 
	
	
	gen dself = index(publisher,"Kindle") + index(publisher,"kindle") + /// 
	index(publisher,"CreateSpace") + ///
	index(publisher,"Createspace") + ///
	index(publisher,"createSpace") + ///
	index(publisher, "iBooks") + ///
	index(publisher, "IngramSpark") + ///
	index(publisher,"Kobo") + ///
	index(publisher,"kobo") + ///
	index(publisher,"Smashwords")+ ///
	index(publisher,"smashwords")+ ///
	index(publisher,"Draft2Digital") + ///
	index(publisher, "Lulu") + ///
	index(publisher, "lulu") + ///
	index(publisher,"Bookbaby") > 0  
		
		
	
	keep book_id year q qf qm pubyr author_id genre1 mshare pmshare fbook fbookp  gno dself qc* 
	
	save data\books_year.dta, replace 
